# Code for descriptive of heterogeneity

# Assuming you already have tie education levels,  the amount of ties that people report and 
# individual education levels

# Take out people below 25 and those who do not report the education level of a single tie 
# (i.e. case control sample)

# Select wave 14

data <- full_panel

# 
# Use 'oplmet' variable for education (with diploma meaning completed, because
# that's the one that you use for the ties as well)

data$education <- data$oplmet
data$education[data$education > 6] <- NA

data$education <- as.numeric(data$education)

data$education_dummy <- ifelse(data$education > 4, 1, 0)

# Recode p1-5 to make categories the same (variables differ slightly.)

data$pers1_edu[data$pers1_edu < 2 | data$pers1_edu > 7] <- NA
data$pers1_edu <- recode(data$pers1_edu, 
                         "2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5, "7" = 6)

data$pers2_edu[data$pers2_edu < 2 | data$pers2_edu > 7] <- NA
data$pers2_edu <- recode(data$pers2_edu, 
                         "2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5, "7" = 6)

data$pers3_edu[data$pers3_edu < 2 | data$pers3_edu > 7] <- NA
data$pers3_edu <- recode(data$pers3_edu, 
                         "2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5, "7" = 6)

data$pers4_edu[data$pers4_edu < 2 | data$pers4_edu > 7] <- NA
data$pers4_edu <- recode(data$pers4_edu, 
                         "2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5, "7" = 6)

data$pers5_edu[data$pers5_edu < 2 | data$pers5_edu > 7] <- NA
data$pers5_edu <- recode(data$pers5_edu, 
                         "2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5, "7" = 6)

# Pers 1-5 dummies for high vs not-high

data$pers1_edu_dummy <- ifelse(data$pers1_edu > 4, 1, 0)
data$pers2_edu_dummy <- ifelse(data$pers2_edu > 4, 1, 0)
data$pers3_edu_dummy <- ifelse(data$pers3_edu > 4, 1, 0)
data$pers4_edu_dummy <- ifelse(data$pers4_edu > 4, 1, 0)
data$pers5_edu_dummy <- ifelse(data$pers5_edu > 4, 1, 0)

# Total ties var

data <- data %>% mutate(
  total_ties = pers1 + pers2 + pers3 + pers4 + pers5)


# Limit sample to respondents who report at least one tie

data <- data %>% filter(total_ties > 2)


# Create variable for differently educated ties for each tie. 

data$diff_educ_1 <- ifelse((data$education_dummy == "0" & data$pers1_edu_dummy == "1") | 
                             (data$education_dummy == "1" & data$pers1_edu_dummy == "0"), 1,
                           ifelse(is.na(data$education_dummy) | is.na(data$pers1_edu_dummy), NA, 0))

data$diff_educ_2 <- ifelse((data$education_dummy == "0" & data$pers2_edu_dummy == "1") | 
                             (data$education_dummy == "1" & data$pers2_edu_dummy == "0"), 1,
                           ifelse(is.na(data$education_dummy) | is.na(data$pers2_edu_dummy), NA, 0))

data$diff_educ_3 <- ifelse((data$education_dummy == "0" & data$pers3_edu_dummy == "1") | 
                             (data$education_dummy == "1" & data$pers3_edu_dummy == "0"), 1,
                           ifelse(is.na(data$education_dummy) | is.na(data$pers3_edu_dummy), NA, 0))

data$diff_educ_4 <- ifelse((data$education_dummy == "0" & data$pers4_edu_dummy == "1") | 
                             (data$education_dummy == "1" & data$pers4_edu_dummy == "0"), 1,
                           ifelse(is.na(data$education_dummy) | is.na(data$pers4_edu_dummy), NA, 0))

data$diff_educ_5 <- ifelse((data$education_dummy == "0" & data$pers5_edu_dummy == "1") | 
                             (data$education_dummy == "1" & data$pers5_edu_dummy == "0"), 1,
                           ifelse(is.na(data$education_dummy) | is.na(data$pers5_edu_dummy), NA, 0))

data$total_diff_edu <- rowSums(data[, c("diff_educ_1", "diff_educ_2", "diff_educ_3", 
                                        "diff_educ_4", "diff_educ_5")], na.rm = TRUE)



# Proportion outgroup by educ group.

data$proportion_outgroup <- data$total_diff_edu/data$total_ties

data$prop_outgroup_bins <- NA
data$prop_outgroup_bins[data$proportion_outgroup == 0] <- "Fully homogenous"
data$prop_outgroup_bins[data$proportion_outgroup > 0 & data$proportion_outgroup <=0.25] <- "0-25% ties with different education"
data$prop_outgroup_bins[data$proportion_outgroup > 0.25 & data$proportion_outgroup <=0.5] <- "25-50% ties with different education"
data$prop_outgroup_bins[data$proportion_outgroup > 0.5] <- "More than 50% differently educated ties"

data$education_dummy <- as.factor(data$education_dummy)

levels(data$education_dummy) <- c("Lower-educated", "Higher-educated")

data$education_dummy <- factor(data$education_dummy, ordered = TRUE,
                               levels = c("Higher-educated",
                                          "Lower-educated"))

data$prop_outgroup_bins <- factor(data$prop_outgroup_bins, ordered = TRUE,
                                  levels = c("More than 50% differently educated ties",
                                             "25-50% ties with different education",
                                             "0-25% ties with different education",
                                             "Fully homogenous"))

plot_df <- prop.table(table(data$prop_outgroup_bins, data$education_dummy), margin = 2)
plot_df <- as.data.frame(plot_df)
plot_df$Var2 <- as.character(plot_df$Var2)

theme_mechs <- theme(
  panel.grid = element_blank(),
  plot.title = element_text(size = 20, family = "CM Roman", hjust = 0.5),
  axis.text = element_text(size = 20, family = "CM Roman", color = "black"),
  axis.title.y = element_text(size = 20, color = "black", family = "CM Roman"),
  axis.title.x = element_text(size = 20, color = "black", family = "CM Roman"),
  panel.background = element_rect(fill = "white"),
  text = element_text(size = 20, colour = "black"),
  axis.ticks = element_blank(),
  legend.text = element_text(size = 12, family = "CM Roman"))

generate_colors <- colorRampPalette(c("grey", "black"))

# Get unique categories from Var1
categories <- unique(plot_df$Var1)

# Generate a sequence of colors
gradient_colors <- generate_colors(length(categories))

# Create a named vector with colors for each category
color_mapping <- setNames(gradient_colors, categories)


ggplot(plot_df, aes(y = Freq, x = Var2, fill = Var1, group = Var1)) +
  geom_bar(stat = "identity") +
  labs(x = "") + coord_flip() +
  scale_fill_manual(values = color_mapping) +  # Use manual scale with custom colors
  labs(fill = "", group = "", y = "Proportion of the population in homogeneous-heterogeneous networks") + 
  theme(legend.position = "bottom", 
  axis.text=element_text(size=16), 
  legend.text=element_text(size=14),
  axis.title.y = element_text(size = 16),
  axis.title.x = element_text(size = 16)  
  ) +
  guides(fill = guide_legend(nrow = 2, byrow = TRUE))



## Play around

# Voting radical parties

data$vote_rad <- ifelse(data$elec_rec == "PVV" | data$elec_rec == "SP" |
                          data$elec_rec == "JA21" | data$elec_rec == "FvD", 1, 0)

data$vote_rad_today <- ifelse(data$elec_today == "PVV" | data$elec_today == "SP" |
                          data$elec_today == "JA21" | data$elec_today == "FvD", 1, 0)

# Network vulnerability index - education, class, unemployment (not in this data)

data$pers1_prof_dummy <- ifelse(data$pers1_prof %in% c(6, 7, 8), 1, 0)
data$pers2_prof_dummy <- ifelse(data$pers2_prof %in% c(6, 7, 8), 1, 0)
data$pers3_prof_dummy <- ifelse(data$pers3_prof %in% c(6, 7, 8), 1, 0)
data$pers4_prof_dummy <- ifelse(data$pers4_prof %in% c(6, 7, 8), 1, 0)
data$pers5_prof_dummy <- ifelse(data$pers5_prof %in% c(6, 7, 8), 1, 0)

data$pers1_vul <- NA
data$pers1_vul[data$pers1_edu_dummy == "0" | data$pers1_prof_dummy == "1"] <- 1
data$pers1_vul[data$pers1_edu_dummy == "0" & data$pers1_prof_dummy == "1"] <- 2
data$pers1_vul[data$pers1_edu_dummy == "1" & data$pers1_prof_dummy == "0"] <- 0

data$pers2_vul <- NA
data$pers2_vul[data$pers2_edu_dummy == "0" | data$pers2_prof_dummy == "1"] <- 1
data$pers2_vul[data$pers2_edu_dummy == "0" & data$pers2_prof_dummy == "1"] <- 2
data$pers2_vul[data$pers2_edu_dummy == "1" & data$pers2_prof_dummy == "0"] <- 0

data$pers3_vul <- NA
data$pers3_vul[data$pers3_edu_dummy == "0" | data$pers3_prof_dummy == "1"] <- 1
data$pers3_vul[data$pers3_edu_dummy == "0" & data$pers3_prof_dummy == "1"] <- 2
data$pers3_vul[data$pers3_edu_dummy == "1" & data$pers3_prof_dummy == "0"] <- 0

data$pers4_vul <- NA
data$pers4_vul[data$pers4_edu_dummy == "0" | data$pers4_prof_dummy == "1"] <- 1
data$pers4_vul[data$pers4_edu_dummy == "0" & data$pers4_prof_dummy == "1"] <- 2
data$pers4_vul[data$pers4_edu_dummy == "1" & data$pers4_prof_dummy == "0"] <- 0

data$pers5_vul <- NA
data$pers5_vul[data$pers5_edu_dummy == "0" | data$pers5_prof_dummy == "1"] <- 1
data$pers5_vul[data$pers5_edu_dummy == "0" & data$pers5_prof_dummy == "1"] <- 2
data$pers5_vul[data$pers5_edu_dummy == "1" & data$pers5_prof_dummy == "0"] <- 0

data$network_vulnerability <- rowSums(data[, c("pers1_vul", "pers2_vul", "pers3_vul", 
                                        "pers4_vul", "pers5_vul")], na.rm = TRUE)

x <- lm_robust(vote_rad ~ network_vulnerability + total_ties + nettoink + gender + age + 
                 urban + employed + education + pol_int + year + profession_404, data = data, 
               cluster=nomem_encr)

## Then: associations






